Load packages we will be using today

We could load only when we need it, but I like knowing what packages we need, so that I can install if I don’t have them.

library(tidyverse)
library(patchwork)
library(readxl)
library(ggplot2) #We will primarily work with ggplot2 today, but lots of options

Read in data and prepare

#Read in the data
sea_chem<-read.table("sim_data.csv",header=T,sep=",",na.strings="NA",row.names=1)
samp_data<-read_xlsx("sim_data.xlsx","samples")

sea_comb<-merge(sea_chem,samp_data,by.x="row.names",by.y="sample_id")
#View(sea_comb) #note that the row names are now a column, we could change this, but for dataframes it's usually better to keep this valuable information in a column


#Create new column to group by based on the values of another
sea_comb$NH4_stat <- ifelse(sea_comb$NH4_nanoM<20,"depleted","replete")
unique(sea_comb$NH4_stat)
## [1] "replete"  "depleted"
# Check the data type and some basic stats
str(sea_comb)
## 'data.frame':    100 obs. of  8 variables:
##  $ Row.names: 'AsIs' chr  "S001" "S002" "S003" "S004" ...
##  $ NH4_nanoM: num  379 312 405 298 316 ...
##  $ NO3_nanoM: num  517 435 459 336 401 ...
##  $ Bact     : int  3210498 3607997 3393192 3316379 3759052 3255187 3976605 3749163 3538062 3942572 ...
##  $ Virus    : int  35013025 36342384 37485073 35002750 39974307 33830160 32538349 38923467 38681229 37545017 ...
##  $ location : chr  "coastal" "coastal" "coastal" "coastal" ...
##  $ depth    : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ NH4_stat : chr  "replete" "replete" "replete" "replete" ...
summary(sea_comb)
##   Row.names           NH4_nanoM          NO3_nanoM           Bact        
##  Length:100         Min.   :  0.5491   Min.   : 327.3   Min.   : 280554  
##  Class :AsIs        1st Qu.: 13.1097   1st Qu.: 441.5   1st Qu.:1391627  
##  Mode  :character   Median : 52.6819   Median : 529.2   Median :2212338  
##                     Mean   :113.3298   Mean   :1086.9   Mean   :2122338  
##                     3rd Qu.:134.5551   3rd Qu.:1060.8   3rd Qu.:2908654  
##                     Max.   :449.6311   Max.   :3531.2   Max.   :3976605  
##      Virus            location             depth        NH4_stat        
##  Min.   : 2820997   Length:100         Min.   :10.0   Length:100        
##  1st Qu.:13928311   Class :character   1st Qu.:10.0   Class :character  
##  Median :21949386   Mode  :character   Median :50.0   Mode  :character  
##  Mean   :20726538                      Mean   :30.8                     
##  3rd Qu.:26669133                      3rd Qu.:50.0                     
##  Max.   :39974307                      Max.   :50.0
#Could also use the dplyr package (part of tidyverse) to summarize
sea_comb %>% #start with the dataset "sea_comb", then
  group_by(NH4_stat) %>% # group by the variable NH4_stat, then
  count() %>% # count the number of observations, then
  ungroup() #remove the grouping
## # A tibble: 2 × 2
##   NH4_stat     n
##   <chr>    <int>
## 1 depleted    35
## 2 replete     65
sea_comb %>%
  group_by(location) %>%
  summarise(mean_NH4 = mean(NH4_nanoM),
            sd_NH4 = sd(NH4_nanoM),
            mean_NO3 = mean(NO3_nanoM),
            sd_NO3 = sd(NO3_nanoM))
## # A tibble: 4 × 5
##   location  mean_NH4 sd_NH4 mean_NO3 sd_NO3
##   <chr>        <dbl>  <dbl>    <dbl>  <dbl>
## 1 coastal     351.    45.8      446.   56.2
## 2 gyre          3.70   1.79     603.   47.0
## 3 offshore     72.4    6.40     443.   41.5
## 4 openOcean    26.3    9.15    2856.  359.
# you could then convert this more easily into a table and export if desired

Exercise 1

#Create a summary table of the bacterial and viral counts, include the mean  and standard error for each location and depth

sea_comb %>%
  group_by(location, depth) %>%
  summarise(mean_Bact = mean(Bact),
            se_Bact = sd(Bact)/sqrt(n()),
            mean_Virus = mean(Virus),
            se_Virus = sd(Virus)/sqrt(n()))
## `summarise()` has grouped output by 'location'. You can override using the
## `.groups` argument.
## # A tibble: 8 × 6
## # Groups:   location [4]
##   location  depth mean_Bact se_Bact mean_Virus se_Virus
##   <chr>     <dbl>     <dbl>   <dbl>      <dbl>    <dbl>
## 1 coastal      10  3559338.  89821.  36047571.  743702.
## 2 coastal      50  2737640. 127603.  26075276. 1266930.
## 3 gyre         10   794588   50143.   7747159.  468649.
## 4 gyre         50   691567.  62726.   6393958.  683433.
## 5 offshore     10  2820644. 148187.  25350993. 1130446.
## 6 offshore     50  2608197. 100502.  25468211.  904790.
## 7 openOcean    10  1936196.  91479.  17877316.  728636.
## 8 openOcean    50  1878338.  74250.  21168506.  918826.

Bar Graphs, Histograms, Scatterplots

#Create a simple bar graph
ggplot(sea_comb, mapping = aes(x = NH4_stat)) +
  geom_bar()

#many options for customization will be available if you either call the help pages or hit tab

#Modify bar chart to show percent of samples that are in each group
bar<- ggplot(data = sea_comb, mapping = aes(x = NH4_stat)) + 
  geom_bar(aes(y = (..count..)/sum(..count..))) + 
  scale_y_continuous(name = "Percent", labels=scales::percent) 
bar

#Create a simple histogram
ggplot(sea_comb, aes(x = NH4_nanoM)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Change the binwidth
hist_NH4<- ggplot(sea_comb, aes(x = NH4_nanoM)) +
  geom_histogram(binwidth = 10) # each bin covers 10nM

#Create a density plot
ggplot(sea_comb, aes(x = NH4_nanoM)) +
  geom_density()

#Histogram & density
# Histogram with kernel density
ggplot(sea_comb, aes(x = NH4_nanoM)) + 
  geom_histogram(aes(y = ..density..)) +
  geom_density()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Create a scatterplot
scat_Micro <- ggplot(sea_comb,aes(x=Bact,y=Virus)) +
            geom_point()
scat_Micro

Plotting multiple figures together

#using bultin command par() & layout() cannot be used with ggplot2
attach(sea_comb)

# figures arranged in 3 rows and 1 columns
par(mfrow=c(3,1))
hist(NH4_nanoM)
hist(NO3_nanoM)
plot(Bact,Virus)

#Can change the margins of the figure
#mar=c(b,l,t,r) or mai for inches

par(mfrow=c(3,1),mar=c(2,2,2,0))
hist(NH4_nanoM)
hist(NO3_nanoM)
plot(Bact,Virus)

dev.off()
## null device 
##           1
par(mfrow=c(3,1))
hist(NH4_nanoM)
hist(NO3_nanoM)
plot(Bact,Virus)

par("mar")
## [1] 5.1 4.1 4.1 2.1
dev.off()
## null device 
##           1
#1 figure in row 1 and 2 figures in 2
layout(matrix(c(1,1,2,3),nrow=2,ncol=2,byrow=T))
hist(NH4_nanoM)
hist(NO3_nanoM)
plot(Bact,Virus)

#2 figure in row 1 and 1 figures in 2
layout(matrix(c(1,2,3,3),nrow=2,ncol=2,byrow=T),
       heights=c(1,2),
       widths=c(2,2)) # figs 1 & 2 in row 1, fig 3 in row 2
hist(NH4_nanoM)
hist(NO3_nanoM)
plot(Bact,Virus)

detach(sea_comb)


#Use patchwork package to plot graphs in one figure
(bar + hist_NH4) / scat_Micro

Saving figures

Color and Label Customizations

#Customize our histogram
hist_NH4 <- ggplot(sea_comb, aes(x = NH4_nanoM)) +
            geom_histogram(binwidth = 10, fill="red") # each bin covers 10nM

#Use built-in themes to further modify
hist_NH4 + theme_classic() #Change this up and see how it affects the graph

#Customize labels
#there are different ways to change the labels and breaks
hist_NH4 + scale_x_continuous(name = "Ammonium Concentration (nM)") +
  scale_y_continuous(name = "Number of Samples",
                     breaks = c(0,5,10,15,20,25)) +
  theme_light()

#Common mistake is to use the wrong scale
#h + scale_x_continuous(name = "Ammonium Concentration (nM)") +
 # scale_y_discrete(name = "Number of Samples",
  #                   breaks = c(0,5,10,15,20,25))  +
    #theme_light()

#Customize a more complex figure
comb_NH4<-ggplot(sea_comb, aes(x = NH4_nanoM)) + 
  geom_histogram(aes(y = ..density..),
                 binwidth = 10) +
  geom_density()+  
  #Next let's change the amount of space data will take up
  scale_y_continuous(name="Density",expand=expansion(mult=c(0.01,.01))) + #come back and adjust to low end from 0 to 0.1 , 0.01
  scale_x_continuous(name="Ammonium (NH4, nM)",expand=expansion(mult=c(0.01,0.01))) #comback and adjust the sides to 0.01, 0.01
comb_NH4

#Take more control over the theme of the plot
comb_NH4 +
  theme(panel.background = element_rect("white", "black", 2, "solid", "black"),
         panel.grid.major = element_line(size = 0.2, linetype = 'dashed',
                                colour = "gray"))

#Let's add some color!
#can use hexadecimal color codes instead of names http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/

comb_NH4_v2 <- 
  ggplot(sea_comb, aes(x = NH4_nanoM)) + 
  geom_histogram(aes(y = ..density..),
                 binwidth = 10,
                 colour="#999999", fill="#FFFFFF") + #changes the color of the line and the fill of the bars
  geom_density(lwd = 1,
               linetype = 2,
               colour = 2) +  #Changes the density line width, line type, and color
 
  scale_y_continuous(name="Density",expand=expansion(mult=c(0.01,.01))) +
  scale_x_continuous(name="Ammonium (NH4, nM)",expand=expansion(mult=c(0.01,0.01))) +
  
  theme_classic(base_size = 16) #modifies a pre-existing theme

comb_NH4_v2

#can use hexadecimal color codes instead of names http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/

Exercise 2

#1 change the bar colors of the bar chart to a blue of your choice using it's hex code

#2 change the theme of the newly colored bar chart

#3 Change the x & y labels of the chart in #2
 
#4 plot the new bar chart and the original in one figure and save it


new<- ggplot(sea_comb, mapping = aes(x = NH4_stat)) +
  geom_bar(fill="#6699CC")+
  theme_bw() +
  xlab("Nitrogen Status") +
  scale_y_continuous(name="# of Samples")

comp_bars<- bar + new

ggsave("comp_bar.png",comp_bars,dpi=100)
## Saving 7 x 5 in image

Customize graphs by grouping variables: color and point shapes

#Need to prepare grouping variables, typically need to tell ggplot2 a variable is a factor
unique(sea_comb$location) #note output
## [1] "coastal"   "offshore"  "openOcean" "gyre"
sea_comb$location<-as.factor(sea_comb$location)
unique(sea_comb$location) #note order of levels
## [1] coastal   offshore  openOcean gyre     
## Levels: coastal gyre offshore openOcean
scat_Nit2<-ggplot(sea_comb,aes(x=NO3_nanoM,y=NH4_nanoM,col=location)) + #color by location
            geom_point()
scat_Nit2 #uses default rainbow colors

          #order of location in legend could better represent our collection strategy

#Indicate the order the factors should be in
sea_comb$location<-factor(sea_comb$location,levels=c("coastal","offshore","openOcean","gyre"))
scat_Nit3<-ggplot(sea_comb,aes(x=NO3_nanoM,y=NH4_nanoM,col=location)) +
            geom_point()+
              scale_color_manual(values = c("#ca7dcc",
                                "#1b98e0",
                                "#353436",
                                "darkred")) #indicate the colors each level should use

scat_Nit3

#Change point shape to identify a second grouping variable (depth)
scat_Nit4<-ggplot(sea_comb,aes(x=NO3_nanoM,y=NH4_nanoM,col=location)) +
            geom_point(aes(shape=as.factor(depth),color=location),size=2)+ #note I changed the depth to a factor in place
              scale_shape_manual(values=c(3,16)) +
              scale_color_manual(values = c("#ca7dcc",
                                "#1b98e0",
                                "#353436",
                                "darkred")) +
          theme_bw()

#the legend labels cold be better!
scat_Nit4 + labs(color = "Location",shape="Water depth (m)") # Show them a new thing, change legend labels

#we could also move the legend
scat_Nit4 + labs(color = "Location",shape="Water depth (m)") +
  theme(legend.position = "top") #values allowed, left, top, right, bottom

scat_Nit4 + labs(color = "Location",shape="Water depth (m)") +
  theme(legend.position = c(0.7,0.7)) 

#give coordinates, values 0-1, 
#c(0,0) corresponds to the “bottom left”
#c(1,1) corresponds to the “top right” position.


scat_Nit4 + labs(color = "Location",shape="Water depth (m)") +
  theme(legend.position = c(0.7,0.7), legend.box="horizontal") #horizontally splits the legends

scat_Nit4 + labs(color = "Location",shape="Water depth (m)") +
  theme(legend.position = c(0.7,0.7), legend.box="horizontal") +
  guides(color=guide_legend(order=2),
         shape=guide_legend(order=1)) #change the order of each guide

Exercise 3

#Modify the dataset and code to produce a NO3 x NH4 scatterplot that does the following
 
  #1 Use color to identify NH4 replete samples
  #2 Use shape to identify NO3 replete samples where NO3 > 2microMolar
  #3 Improve the labels for the graph
  #4 Choose a different theme for the graph


#First need to create a new grouping variable
sea_comb$NO3_stat<-ifelse(sea_comb$NO3_nanoM>2000,"replete","depleted")

#Create a ggplot scatter plot, x =NO3, y=NH4, color by NH4_stat, but the shape is the new NO3_stat variable
p<-ggplot(sea_comb,aes(x=NO3_nanoM,y=NH4_nanoM,col=NH4_stat)) +
            geom_point(aes(shape=NO3_stat,color=NH4_stat),size=2)+
              scale_shape_manual(values=c(17,16)) +
              scale_color_manual(values = c("red",
                                "blue"))

p + xlab("Nitrate (nM)") +
  ylab("Ammonium (nM)") +
  theme_light(base_rect_size = 2) +
  labs(color = "Ammonium",shape="Nitrate") +
  theme( legend.position = c(0.7,0.7), legend.box="horizontal", 
         legend.background = element_rect(size=1,linetype="solid",color="black"))

Detecting patterns in scatterplots

ggplot(sea_comb, aes(x = Bact, y = Virus)) +
  geom_point() +
  geom_smooth(method = "lm") #draws a linear regression line, by default includes confidence envelope
## `geom_smooth()` using formula 'y ~ x'

#Indicate groups within the scatterplot
ggplot(sea_comb, aes(x = Bact, y = Virus, color=as.factor(depth))) +
  geom_point() +
  geom_smooth(method = "lm") +
    scale_colour_discrete(name = "Water Depth (m)",
                      labels = c("10", "50"))
## `geom_smooth()` using formula 'y ~ x'

#Customize the colors using built in pallettes
ggplot(sea_comb, aes(x = Bact, y = Virus,color=as.factor(depth))) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_color_brewer(palette = "Set1",
                     name = "Water Depth (m)",
                      labels = c("10", "50")) # http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
## `geom_smooth()` using formula 'y ~ x'

Exercise 4

#create a scatterplot of Bacteria and Virus counts, and
#1 group the samples by location
#2 use the scale_* functions to rename the axes and customize the group colors
#3 fit a line to each group and remove the confidence envelope

unique(sea_comb$location)
## [1] coastal   offshore  openOcean gyre     
## Levels: coastal offshore openOcean gyre
ggplot(sea_comb, aes(x = Bact, y = Virus,color=location)) +
  geom_point() +
  geom_smooth(method=lm,se=F) +
  scale_color_brewer(palette = "Dark2",
                     name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean", "Gyre")) 
## `geom_smooth()` using formula 'y ~ x'

#4 what could we change to color by location but fit a line to all data points or by depth
ggplot(sea_comb, aes(x = Bact, y = Virus)) +
  geom_point(aes(color=location)) +
  geom_smooth(method=lm,se=F) +
  scale_color_brewer(palette = "Dark2",
                     name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean", "Gyre")) 
## `geom_smooth()` using formula 'y ~ x'

ggplot(sea_comb, aes(x = Bact, y = Virus,linetype=as.factor(depth))) +
  geom_point(aes(color=location)) + 
  geom_smooth(method = 'lm',color="black")
## `geom_smooth()` using formula 'y ~ x'

#Extra
#to get the formula and r2 need to do some work, https://stackoverflow.com/questions/7549694/add-regression-line-equation-and-r2-on-graph
BvV<-lm(Bact~Virus,sea_comb)
summary(BvV)
## 
## Call:
## lm(formula = Bact ~ Virus, data = sea_comb)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -898531 -298354  -62101  199847 1307823 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.600e+05  1.122e+05   2.317   0.0226 *  
## Virus       8.985e-02  4.901e-03  18.335   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 476500 on 98 degrees of freedom
## Multiple R-squared:  0.7743, Adjusted R-squared:  0.772 
## F-statistic: 336.2 on 1 and 98 DF,  p-value: < 2.2e-16
lm_eqn <- function(df){
    m <- lm(Bact ~ Virus, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

p<-ggplot(sea_comb, aes(x = Bact, y = Virus)) +
  geom_point(aes(color=location)) +
  geom_smooth(method=lm,se=F) +
  scale_color_brewer(palette = "Dark2",
                     name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean", "Gyre")) 

p + geom_text(x=1e6,y=3.5e7,label=lm_eqn(sea_comb),parse=T)
## `geom_smooth()` using formula 'y ~ x'

Transforming data

Our data is currently in wide format

- row = participant or sample, column = variable
- gplot and computers don't really like this human readable format

We can transform (reshape) our data to long-format

- row = observation 
- each sample will have multiple rows, one for each observation
- each row is unique when looking at the combination of each measurement
- each variable that shares an axis will typically be in the same column
#show how to "hide" R markdown results
# many ways to reshape data from wide <-> long
head(sea_comb)

#reshape {stats}
reshape(sea_comb,idvar="Row.names",
        varying=c("NH4_nanoM","NO3_nanoM"),v.name=c("Conc_nM"),
        times=c("NH4_nanoM","NO3_nanoM"),
        direction="long")

#tidyverse, pivot_longer
sea_long<-pivot_longer(sea_comb,
             cols = c("NH4_nanoM","NO3_nanoM"),
             names_to="Nitrogen",
             values_to="Conc_nM")

head(sea_long)


#modify column values
sea_long$Nitrogen<-vapply(strsplit(sea_long$Nitrogen,"_"), `[`, 1, FUN.VALUE=character(1))
head(sea_long)

Visualizing our reshaped data

#Splitting figures using grouping variables with facet_grid
ggplot(sea_long, aes(y = Conc_nM,x=location)) +
  geom_point() +
  facet_grid(~Nitrogen)

ggplot(sea_long, aes(x = Conc_nM,y=depth) )+
  geom_point() +
  facet_grid(Nitrogen~location) + # show an example of change the formula
  #facet_grid(~Nitrogen +location)
  scale_y_reverse() 

ggplot(sea_long, aes(y = Conc_nM,x=as.factor(depth))) +
  geom_point() +
  facet_grid(Nitrogen~location, scales="free_y")
 
  
#Splitting figures using grouping variables with facet_wrap
ggplot(sea_long, aes(y = Conc_nM,x=as.factor(depth))) +
  geom_point() +
  facet_wrap(Nitrogen~location) +
  scale_y_reverse() 

ggplot(sea_long, aes(y = Conc_nM,x=as.factor(depth))) +
  geom_point() +
  facet_wrap(~location + Nitrogen, ncol = 2) +
  scale_y_reverse() 

Exercise 5

#Create a facet_grid plot that does the following:

#1 Has concentration on the y-axis
#2 Is faceted by sample location
#3 Has clearly labeled axes
#4 An improved theme
#5 Different colors for each sample depth

sea_long$Nitrogen<-factor(sea_long$Nitrogen,levels=c("NO3", "NH4"))

# New facet label names for dose variable
loc.labs <- c("Coastal", "Offshore", "Open Ocean", "Gyre")
names(loc.labs) <- c("coastal", "offshore", "openOcean","gyre")


ggplot(sea_long, aes(y = Conc_nM,x=Nitrogen)) +
  geom_point(aes(col=as.factor(depth))) +
  facet_grid(~location,
              labeller = labeller(location = loc.labs)) +
  xlab("Nitrogen Species") + ylab("concentration (nM)") +
  theme_bw() + labs(color = "Water Depth (m)") +
  theme(strip.background =element_rect(fill="white"))+
  theme(strip.text = element_text(colour = 'Black',face = "bold"))

Create overlapping data figures with reshaped data

#Compare the distribution of data between groups
ggplot(sea_long, aes(x = Conc_nM, fill = Nitrogen)) +
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Nitrogen Species (nM)") +
  scale_fill_discrete(name = "Nitrogen Species",
                      labels = c("NO3", "NH4")) #be careful here because you have now reordered the levels

#Customize the colors
cols<-c("red","blue")
#cols <- c("#F76D5E", "#72D8FF")

ggplot(sea_long, aes(x = Conc_nM, fill = Nitrogen)) +
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Nitrogen Species (nM)") +
  scale_fill_discrete(name = "Nitrogen Species",
                      labels = c("NO3", "NH4")) +
  scale_fill_manual(values=cols)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

ggplot(sea_long, aes(x = Conc_nM, fill = Nitrogen)) +
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Nitrogen Species (nM)",expand=expansion(mult=c(0.01,.01))) +
  scale_y_continuous(expand=expansion(mult=c(0.01,.01)))+
  scale_fill_discrete(name = "Nitrogen Species",
                      labels = c("Nitrate", "Ammonium")) +
  scale_fill_manual(values=c("black","lightgray")) +
  theme_bw()
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

#Focus on one variable
ggplot(subset(sea_long,Nitrogen == "NH4"), aes(x = Conc_nM, fill = location)) + #could also create a new subsetted dataframe
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Ammonium Concentration (nM)",expand=expansion(mult=c(0.01,.01))) +
  scale_y_continuous(expand=expansion(mult=c(0.01,.01)))+
  scale_fill_discrete(name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean","Gyre")) +
  theme_bw()

Specify axis breaks

#could specify the breaks we want exactly
ggplot(subset(sea_long,Nitrogen == "NH4"), aes(x = Conc_nM, fill = location)) + #could also create a new subsetted dataframe
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Ammonium Concentration (nM)",expand=expansion(mult=c(0.01,.01)),
                     breaks=c(0,50,100,150,200,250,300,350,400,450)) +
  scale_y_continuous(expand=expansion(mult=c(0.01,.01)))+
  scale_fill_discrete(name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean","Gyre")) +
  theme_bw()

#more efficiently, we could use the sequence function, seq()
ggplot(subset(sea_long,Nitrogen == "NH4"), aes(x = Conc_nM, fill = location)) + #could also create a new subsetted dataframe
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Ammonium Concentration (nM)",expand=expansion(mult=c(0.01,.01)),
                     breaks=seq(0,450,by=50)) + #notice behaviour if we change past the natural x-limits
  scale_y_continuous(expand=expansion(mult=c(0.01,.01)))+
  scale_fill_discrete(name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean","Gyre")) +
  theme_bw()

ggplot(subset(sea_long,Nitrogen == "NH4"), aes(x = Conc_nM, fill = location)) + #could also create a new subsetted dataframe
  geom_density(alpha = 0.75)+
  scale_x_continuous(name = "Ammonium Concentration (nM)",expand=expansion(mult=c(0.01,.01)), limits = c(0,500),
                     breaks=seq(0,500,by=50)) + #notice behaviour if we change past the natural x-limits
  scale_y_continuous(expand=expansion(mult=c(0.01,.01)))+
  scale_fill_discrete(name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean","Gyre")) +
  theme_bw()

#check the range to make sure we are capturing all the data
range(subset(sea_long,Nitrogen == "NH4")$Conc_nM)
## [1]   0.5491473 449.6310512

Exercise 6

#Update your sea_long dataset to include the bacteria and viral counts in long format
#Create a facetted plot of the microbial counts by location , colored by depth, and has improved labels and colors
sea_long2<-pivot_longer(sea_long,
             cols = c("Bact","Virus"),
             names_to="Microbe",
             values_to="Counts")

head(sea_long2)
## # A tibble: 6 × 9
##   Row.names location depth NH4_stat NO3_stat Nitrogen Conc_nM Microbe   Counts
##   <I<chr>>  <fct>    <dbl> <chr>    <chr>    <fct>      <dbl> <chr>      <int>
## 1 S001      coastal     10 replete  depleted NH4         379. Bact     3210498
## 2 S001      coastal     10 replete  depleted NH4         379. Virus   35013025
## 3 S001      coastal     10 replete  depleted NO3         517. Bact     3210498
## 4 S001      coastal     10 replete  depleted NO3         517. Virus   35013025
## 5 S002      coastal     10 replete  depleted NH4         312. Bact     3607997
## 6 S002      coastal     10 replete  depleted NH4         312. Virus   36342384
ggplot(sea_long2, aes(y = Counts,x=Microbe)) +
  geom_point(aes(col=as.factor(depth))) +
  facet_grid(~location,
              labeller = labeller(location = loc.labs)) +
  xlab("Microorganism") + ylab("Abundance (particle/ml)") +
  scale_color_brewer(palette = "Set2")+
  theme_bw() + labs(color = "Water Depth (m)") +
  theme(strip.background =element_rect(fill="white"))+
  theme(strip.text = element_text(colour = 'Black',face = "bold"))

#Extra if we want to improve the look of the points to not lay on top of each other
ggplot(sea_long2, aes(y = Counts,x=Microbe)) +
  geom_jitter(aes(col=as.factor(depth))) +
  facet_grid(~location,
              labeller = labeller(location = loc.labs)) +
  xlab("Microorganism") + ylab("Abundance (particle/ml)") +
  scale_color_brewer(palette = "Set2")+
  theme_bw() + labs(color = "Water Depth (m)") +
  theme(strip.background =element_rect(fill="white"))+
  theme(strip.text = element_text(colour = 'Black',face = "bold"))

#Want each microbe on own scale
ggplot(sea_long2, aes(y = Counts,x=location)) +
  geom_jitter(aes(col=as.factor(depth))) +
  facet_wrap(~Microbe,
              labeller = labeller(location = loc.labs),scales = "free_y") +
  xlab("Location") + ylab("Abundance (particle/ml)") +
  scale_color_brewer(palette = "Set2")+
  theme_bw() + labs(color = "Water Depth (m)") +
  theme(strip.background =element_rect(fill="white"))+
  theme(strip.text = element_text(colour = 'Black',face = "bold"))

Boxplots & violin plots

ggplot(sea_long2, aes(x = location, y = Counts)) +
  facet_wrap(~Microbe,scales="free_y")+
  geom_boxplot()

ggplot(sea_long2, aes(x = location, y = Counts)) +
  facet_wrap(~Microbe,scales="free_y")+
  geom_violin()

#can combine the two
ggplot(sea_long2, aes(x = location, y = Counts)) +
  facet_wrap(~Microbe,scales="free_y")+
  geom_boxplot() +
  geom_violin(alpha=0.2)

ggplot(sea_long2, aes(x = location, y = Counts)) +
  facet_wrap(~Microbe,scales="free_y")+
  geom_violin() +
  geom_boxplot(alpha=0.2, width=0.2)

#Exercise 7

#Improve the labels, colors , and theme of the combined box & violin plot
bp<-ggplot(sea_long2, aes(x = location, y = Counts,fill=location)) +
  facet_wrap(~Microbe,scales="free_y")+
  geom_violin(alpha=0.25) +
  geom_boxplot(alpha=0.75, width=0.2)

bp +  scale_fill_brewer(palette = "Dark2",name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean","Gyre")) +
  theme_bw() + labs(color = "Location") +
  theme(strip.background =element_rect(fill="white"))+
  theme(strip.text = element_text(colour = 'Black',face = "bold")) +
  scale_x_discrete(name = "Location",
                      labels = c("Coastal", "Off Shore","Open Ocean","Gyre"))+
  scale_y_continuous(name="Abundance (cell/ml)")+
  guides(x=guide_axis(angle=45))

  #if i waned to change the y-axis numbers https://stackoverflow.com/questions/10762287/how-can-i-format-axis-labels-with-exponents-with-ggplot2-and-scales